
********************
* ROBUSTNESS TESTS *
********************
* Labussiere 2023

/* Produce the following outputs:

	In supplementary materials:
		Tables S11 S14 S15 S16	
*/

program define reload_dataset

use full_sample.dta

xtset sibling_id_ma

mark non_missing
markout non_missing gender first_born_FULREG date_birth_cat parent_ed_level_miss ///
homeowner_alo_cito q3_st_disp_income_cito secm_ma_agg2_cito secm_pa_agg2_cito ///
country_of_birth_ma_agg type_household_cito nbr_children_cito

tab non_missing
count if non_missing
disp "Multivariate sample: N = " r(N) 
* 94,727 

gen outcome = cito_score_st_year

end program


capture program drop save_estimates
program define save_estimates
estimates save ster/`1', replace
estimates use  ster/`1'
estimates store `1'
end program


******************************************************************
* TABLE S11
* Replications of the results with a naturalisation dummy
******************************************************************
reload_dataset

* Random-effects
xtreg outcome i.nat_13_censored i.gender i.first_born_FULREG i.date_birth_cat ///
	i.parent_ed_level_miss i.homeowner_alo_cito i.q3_st_disp_income_cito ///
	i.secm_ma_agg2_cito i.secm_pa_agg2_cito i.country_of_birth_ma_agg ///
	ib(1).type_household_cito nbr_children_cito
save_estimates RE_nat13

* OLS 
regress outcome i.nat_13_censored i.gender i.first_born_FULREG i.date_birth_cat ///
	i.parent_ed_level_miss i.homeowner_alo_cito i.q3_st_disp_income_cito ///
	i.secm_ma_agg2_cito i.secm_pa_agg2_cito i.country_of_birth_ma_agg ///
	ib(1).type_household_cito nbr_children_cito
save_estimates OLS_nat13

* Within-between model
tabulate nat_13_censored, gen(D_nat)
tabulate parent_ed_level_miss, generate(D_ed_level)
tabulate q3_st_disp_income_cito, generate(D_income)
tabulate secm_ma_agg2_cito, generate(D_ses_ma)
tabulate secm_pa_agg2_cito, generate(D_ses_pa)
tab country_of_birth_ma_agg, generate(D_country_ma)
tab date_birth_cat, generate(D_date_birth)
gen single_parent = (type_household_cito == 2)
replace single_parent = . if missing(type_household_cito)

xthybrid outcome D_nat2                         						///
		gender first_born_FULREG D_ed_level2 D_ed_level3 D_ed_level4       ///
					D_income2 D_income3 D_income4 homeowner_alo_cito    ///
		D_ses_ma2 D_ses_ma3 D_ses_ma4 D_ses_pa2 D_ses_pa3 D_ses_pa4     ///
								single_parent nbr_children_cito		    ///
			D_date_birth2 D_date_birth3 D_date_birth4 D_date_birth5     ///
			D_date_birth6 D_date_birth7 D_date_birth8 D_date_birth9     ///
			D_country_ma1 D_country_ma2 D_country_ma3 D_country_ma4     ///
			D_country_ma5 D_country_ma7 D_country_ma8 D_country_ma9     ///
if non_missing, test clusterid(sibling_id_ma) p full
save_estimates MIXED_nat13


esttab OLS_nat13 RE_nat13 MIXED_nat13  ///
using TableS11.rtf, ///
replace ti("Comparison all main models for naturalisation dummy estimates") ///
mtitles("Model OLS" "Model RE" "Within/Between") ///
keep(1.nat_13_censored W__D_nat2 B__D_nat2) ///
scalars(sigma_u sigma_e N N_g r2_w r2_b r2_o r2) ///
modelwidth(6) label nogap nonumbers b(3)  ///
nostar p wide


******************************************************************
* TABLE S14
* Replication of the results with age at naturalisation censored
* at age 10 
******************************************************************
reload_dataset

* Random-effects
xtreg outcome ib(5).age_nat_10_cat6 i.gender i.first_born_FULREG i.date_birth_cat ///
	i.parent_ed_level_miss i.homeowner_alo_cito i.q3_st_disp_income_cito ///
	i.secm_ma_agg2_cito i.secm_pa_agg2_cito i.country_of_birth_ma_agg ///
	ib(1).type_household_cito nbr_children_cito 
save_estimates RE_nat10censored

* OLS 
regress outcome ib(5).age_nat_10_cat6 i.gender i.first_born_FULREG i.date_birth_cat ///
	i.parent_ed_level_miss i.homeowner_alo_cito i.q3_st_disp_income_cito ///
	i.secm_ma_agg2_cito i.secm_pa_agg2_cito i.country_of_birth_ma_agg ///
	ib(1).type_household_cito nbr_children_cito 
save_estimates OLS_nat10censored

* Within-between
tabulate age_nat_10_cat6, gen(D_age_nat)
tabulate parent_ed_level_miss, generate(D_ed_level)
tabulate q3_st_disp_income_cito, generate(D_income)
tabulate secm_ma_agg2_cito, generate(D_ses_ma)
tabulate secm_pa_agg2_cito, generate(D_ses_pa)
tab country_of_birth_ma_agg, generate(D_country_ma)
tab date_birth_cat, generate(D_date_birth)
gen single_parent = (type_household_cito == 2)
replace single_parent = . if missing(type_household_cito)

xthybrid outcome D_age_nat1 D_age_nat2 D_age_nat3  						///
				 D_age_nat4 D_age_nat5 			 						///
		    first_born_FULREG gender D_ed_level2 D_ed_level3 D_ed_level4   ///
					D_income2 D_income3 D_income4 homeowner_alo_cito    ///
		D_ses_ma2 D_ses_ma3 D_ses_ma4 D_ses_pa2 D_ses_pa3 D_ses_pa4     ///
								single_parent nbr_children_cito		    ///
			D_date_birth2 D_date_birth3 D_date_birth4 D_date_birth5     ///
			D_date_birth6 D_date_birth7 D_date_birth8 D_date_birth9     ///
			D_country_ma1 D_country_ma2 D_country_ma3 D_country_ma4    ///
			D_country_ma5 D_country_ma7 D_country_ma8 D_country_ma9    ///
if non_missing, test clusterid(sibling_id_ma) p full
save_estimates MIXED_nat10censored

esttab OLS_nat10censored RE_nat10censored MIXED_nat10censored  ///
using TableS14.rtf, ///
replace ti("Comparison all main models with different age categories") ///
mtitles("Model OLS" "Model RE" "Within/Between") ///
keep(0.age_nat_10_cat6 1.age_nat_10_cat6 2.age_nat_10_cat6 3.age_nat_10_cat6 ///
4.age_nat_10_cat6 					                    ///
W__D_age_nat1 W__D_age_nat2 W__D_age_nat3 W__D_age_nat4 ///
W__D_age_nat5                            			    ///
B__D_age_nat1 B__D_age_nat2 B__D_age_nat3 B__D_age_nat4 ///
B__D_age_nat5  						   					///
) ///
modelwidth(6) label nogap nonumbers b(3)  ///
nostar p wide


******************************************************************
* TABLE S15
* Replication of the results with age at naturalisation dummies
******************************************************************
reload_dataset

* Random-effects
xtreg outcome ib(14).age_nat_dummies i.gender i.first_born_FULREG i.date_birth_cat ///
	nbr_children_cito ib(1).type_household_cito  ///
	i.secm_ma_agg2_cito i.secm_pa_agg2_cito ///
	i.homeowner_alo_cito i.q3_st_disp_income_cito ///
	i.parent_ed_level_miss ib(6).country_of_birth_ma_agg
estimates store REmodel_dummies

* OLS
regress outcome ib(14).age_nat_dummies i.gender i.first_born_FULREG i.date_birth_cat ///
	i.parent_ed_level_miss i.homeowner_alo_cito i.q3_st_disp_income_cito ///
	i.secm_ma_agg2_cito i.secm_pa_agg2_cito i.country_of_birth_ma_agg ///
	ib(1).type_household_cito nbr_children_cito
estimates store OLSmodel_dummies

* Within-between
xthybrid outcome D_age_nat_ctn1 D_age_nat_ctn2 D_age_nat_ctn3          ///
D_age_nat_ctn4 D_age_nat_ctn5 D_age_nat_ctn6 D_age_nat_ctn7            ///
D_age_nat_ctn8 D_age_nat_ctn9 D_age_nat_ctn10 D_age_nat_ctn11          ///
D_age_nat_ctn12 D_age_nat_ctn13 D_age_nat_ctn14    			           ///
          gender first_born_FULREG D_ed_level2 D_ed_level3 D_ed_level4    ///
		  D_income2 D_income3 D_income4 homeowner_alo_cito             /// 
		  D_ses_ma2 D_ses_ma3 D_ses_ma4 D_ses_pa2 D_ses_pa3 D_ses_pa4  ///
								    single_parent nbr_children_cito	   ///
			D_date_birth2 D_date_birth3 D_date_birth4 D_date_birth5    ///
			D_date_birth6 D_date_birth7 D_date_birth8 D_date_birth9    ///
			D_country_ma1 D_country_ma2 D_country_ma3 D_country_ma4    ///
			D_country_ma5 D_country_ma7 D_country_ma8 D_country_ma9    ///
if non_missing, test clusterid(sibling_id_ma) p full
estimates store MIXEDmodel_dummies

* Export comparison output
esttab REmodel_dummies OLSmodel_dummies MIXEDmodel_dummies ///
  using TableS15.rtf, ///
replace ti("Comparison all main models for age at naturalisation estimates") ///
mtitles("Model OLS" "Model RE" "Within/Between") ///
keep(0.age_nat_dummies 1.age_nat_dummies 2.age_nat_dummies 3.age_nat_dummies ///
4.age_nat_dummies 5.age_nat_dummies 6.age_nat_dummies 7.age_nat_dummies ///
8.age_nat_dummies 9.age_nat_dummies 10.age_nat_dummies 11.age_nat_dummies ///
12.age_nat_dummies 13.age_nat_dummies ///
W__D_age_nat_ctn1 W__D_age_nat_ctn2 W__D_age_nat_ctn3 W__D_age_nat_ctn4 ///
W__D_age_nat_ctn5 W__D_age_nat_ctn6 W__D_age_nat_ctn7 W__D_age_nat_ctn8 ///
W__D_age_nat_ctn9 W__D_age_nat_ctn10 W__D_age_nat_ctn11 W__D_age_nat_ctn12 ///
W__D_age_nat_ctn13 W__D_age_nat_ctn14 					    			   ///
B__D_age_nat_ctn1 B__D_age_nat_ctn2 B__D_age_nat_ctn3 B__D_age_nat_ctn4 ///
B__D_age_nat_ctn5 B__D_age_nat_ctn6 B__D_age_nat_ctn7 B__D_age_nat_ctn8 ///
B__D_age_nat_ctn9 B__D_age_nat_ctn10 B__D_age_nat_ctn11 B__D_age_nat_ctn12 ///
B__D_age_nat_ctn13 B__D_age_nat_ctn14									///
) ///
modelwidth(6) label nogap nonumbers b(3)  ///
nostar p wide


******************************************************************
* TABLE S16
* Replication of the result with a 6-category variable for age 
* at naturalisation 
******************************************************************
reload_dataset

* Different age groupings
gen age_nat_13_cat6 = 0 if inlist(age_nat_13_censored,0)
replace age_nat_13_cat6 = 1 if inrange(age_nat_13_censored,1,2)
replace age_nat_13_cat6 = 2 if inrange(age_nat_13_censored,3,5)
replace age_nat_13_cat6 = 3 if inrange(age_nat_13_censored,6,10)
replace age_nat_13_cat6 = 4 if inrange(age_nat_13_censored,11,13)
replace age_nat_13_cat6 = 5 if inlist(age_nat_13_censored,14)
label define agecat_6 0 "0" ///
					 1 "1-2" ///
					 2 "3-5" ///
					 3 "6-10" ///
					 4 "11-13" ///
					 5 "14+", replace
label values age_nat_13_cat6 agecat_6

label variable age_nat_13_cat6 "Age at naturalisation censored after 13 (6 categories, 14+)"

* Random-effects
xtreg outcome ib(5).age_nat_13_cat6 i.gender i.first_born_FULREG i.date_birth_cat ///
	i.parent_ed_level_miss i.homeowner_alo_cito i.q3_st_disp_income_cito ///
	i.secm_ma_agg2_cito i.secm_pa_agg2_cito i.country_of_birth_ma_agg ///
	ib(1).type_household_cito nbr_children_cito 
save_estimates RE_altagegp

* OLS 
regress outcome ib(5).age_nat_13_cat6 i.gender i.first_born_FULREG i.date_birth_cat ///
	i.parent_ed_level_miss i.homeowner_alo_cito i.q3_st_disp_income_cito ///
	i.secm_ma_agg2_cito i.secm_pa_agg2_cito i.country_of_birth_ma_agg ///
	ib(1).type_household_cito nbr_children_cito 
save_estimates OLS_altagegp

* Within-between
tabulate age_nat_13_cat6, gen(D_age_nat)
tabulate parent_ed_level_miss, generate(D_ed_level)
tabulate q3_st_disp_income_cito, generate(D_income)
tabulate secm_ma_agg2_cito, generate(D_ses_ma)
tabulate secm_pa_agg2_cito, generate(D_ses_pa)
tab country_of_birth_ma_agg, generate(D_country_ma)
tab date_birth_cat, generate(D_date_birth)
gen single_parent = (type_household_cito == 2)
replace single_parent = . if missing(type_household_cito)

xthybrid outcome D_age_nat1 D_age_nat2 D_age_nat3  						///
				 D_age_nat4 D_age_nat5 			 						///
		    first_born_FULREG gender D_ed_level2 D_ed_level3 D_ed_level4   ///
					D_income2 D_income3 D_income4 homeowner_alo_cito    ///
		D_ses_ma2 D_ses_ma3 D_ses_ma4 D_ses_pa2 D_ses_pa3 D_ses_pa4     ///
								single_parent nbr_children_cito		    ///
			D_date_birth2 D_date_birth3 D_date_birth4 D_date_birth5     ///
			D_date_birth6 D_date_birth7 D_date_birth8 D_date_birth9     ///
			D_country_ma1 D_country_ma2 D_country_ma3 D_country_ma4    ///
			D_country_ma5 D_country_ma7 D_country_ma8 D_country_ma9    ///
if non_missing, test clusterid(sibling_id_ma) p full
save_estimates MIXED_altagegp


esttab OLS_altagegp RE_altagegp MIXED_altagegp ///
using TableS16.rtf, ///
replace ti("Comparison all main models with different age categories") ///
mtitles("Model OLS" "Model RE" "Within/Between") ///
keep(0.age_nat_13_cat6 1.age_nat_13_cat6 2.age_nat_13_cat6 3.age_nat_13_cat6 ///
4.age_nat_13_cat6 					                    ///
W__D_age_nat1 W__D_age_nat2 W__D_age_nat3 W__D_age_nat4 ///
W__D_age_nat5                            			    ///
B__D_age_nat1 B__D_age_nat2 B__D_age_nat3 B__D_age_nat4 ///
B__D_age_nat5  						   					///
) ///
modelwidth(6) label nogap nonumbers b(3)  ///
nostar p wide









